use "$data/allmergeprep50_ready.dta", clear
append using "$data/allmergeprep55.dta"
append using "$data/allmergeprep60.dta" 
append using "$data/allmergeprep61.dta"
append using "$data/allmergeprep66.dta"


drop if prin_acty_occ=="X01"| prin_acty_occ=="X02"| prin_acty_occ=="X09"| prin_acty_occ=="X10"| prin_acty_occ=="X99"| prin_acty_occ=="XXX"|prin_acty_occ=="X00"
drop if subs_acty_occ=="X01"| subs_acty_occ=="X02"| subs_acty_occ=="X09"| subs_acty_occ=="X10"| subs_acty_occ=="X99"| subs_acty_occ=="XXX"
destring subs_acty_occ, replace 

*saveold "NSSdata_unemp_emp_survey", replace
*drop if year==200304 
keep if inrange(age,18,65)

*Treated state - Andhra Pradesh
gen treat = (state == 28)


g post = (year==200405)
replace post =1 if year==200304
replace post =1 if year==200910

g posttreat = post*treat
egen stateyear = group(state year)
gen indcode = int(prin_acty_ind/1000)
replace indcode = int(prin_acty_ind/100) if year==199394
gen threedigitind = int(prin_acty_ind/100)
replace threedigitind = int(prin_acty_ind/10) if year==199394

*Working status of the worker
gen wageworker = inrange(prin_acty_stat,31,51)

keep if prin_acty_stat<=51
gen sample_nohh = inrange(prin_acty_stat,21,51)



*Casual workers
gen casualworker  = 0 
replace casualworker = 1 if prin_acty_stat == 11 | prin_acty_stat == 21 ///
							| prin_acty_stat == 41 | prin_acty_stat == 51



*Works in an informal firm
gen formal_firm=0 if !missing(numworkers) & !missing(electricity)
replace formal_firm=1 if  numworkers>=4  | (numworkers>=3 & electricity==1)
recode formal_firm (0=1) (1=0), gen(informalfirm)
replace informalfirm = . if missing(numworkers)
replace informalfirm = 1 if prin_acty_stat == 21 
replace informalfirm = 1 if prin_acty_stat == 11 
replace informalfirm=. if year==199394|year==200304



*Is a formal worker
gen formalworker=0
replace formalworker =1 if mode_pay1==3|mode_pay1==4 | ///
							mode_pay1==13|mode_pay1==14 | ///
							mode_pay1==18|mode_pay1==19 | ///
							mode_pay1==18|mode_pay1==29
replace formalworker = . if missing(mode_pay1)
recode formalworker (0=1) (1=0), gen(informalworker)
replace informalworker=1 if prin_acty_stat == 21
replace informalworker=. if year==199394|year==200304


*Classifying weekly wages as formal as well
gen formalworker2=0
replace formalworker2 =1 if mode_pay1==2|mode_pay1==3|mode_pay1==4 | ///
							mode_pay1==12| mode_pay1==13|mode_pay1==14 | ///
							mode_pay1==17| mode_pay1==18|mode_pay1==19 | ///
							mode_pay1==27|mode_pay1==18|mode_pay1==29
replace formalworker2 = . if missing(mode_pay1)
recode formalworker2 (0=1) (1=0), gen(informalworker2)
replace informalworker2=1 if prin_acty_stat == 21
replace informalworker2=. if year==199394|year==200304


*Daily Wages
gen dwage_wind = dwage 
qui summ dwage, de 
replace dwage_wind = . if dwage>r(p99)
replace dwage_wind = . if inrange(prin_acty_stat,11,21)
gen lndwage = ln(dwage)
gen lndwage_wind = ln(dwage_wind)

*Hourly wages
gen hwage_wind = dwage_wind/8 
gen lnhwage_wind = ln(hwage_wind)

*Generating the variables excluding household work
foreach var of varlist casualworker informalfirm informalworker informalworker2 lndwage_wind {
	gen `var'_nohh = `var'
	replace `var'_nohh = . if sample_nohh == 0
}

*Individual controls
recode  genedu_recode (0 1 2 3 =0) (4 5=1), gen(highedu)

*Any high education is 1 if technical education or college and above general education is obtained 
gen anyhighedu = 0
replace anyhighedu=1 if genedu_recode==5 | tech_edu!=1 

***Unemployed
gen unemployed = (prin_acty_stat==81)

***Agricultural worker
gen agworker = (indcode<10) 
replace agworker =. if indcode==.

gen age2 = age*age

gen married = (marstatus>=2) 
replace married = . if marstatus==.

gen education =0
replace education = 1 if gen_edu==8|gen_edu==7
replace education = 2 if gen_edu>=9

gen techedu_recode =0
replace techedu_recode =1 if  tech_edu!=1

egen Dcode = group(state district)

g fakepost = (year==199900)
g placebo = fakepost*treat

tab year, gen(year)
gen treat9394 = year1 * treat
gen treat9900 = year2 * treat
gen treat0304 = year3 * treat
gen treat0405 = year4 * treat
gen treat0910 = year5 * treat 
gen zero=0

label variable treat9394 "1993-94" 
label variable treat9900 "1999-00"
label variable treat0304 "2003-04"
label variable treat0405 "2004-05"
label variable zero "1999-00"
label variable posttreat "Post X Treat"
label variable placebo "Placebo"

gen occlen = strlen(prin_acty_occ)
gen prin_occ_2digit = prin_acty_occ
replace prin_occ_2digit = substr(prin_acty_occ,1,2) if occlen>2
destring prin_occ_2digit, replace
destring prin_acty_occ, replace


gen informal_intensive = 0 if informalfirm == 0
replace informal_intensive = 1 if informalfirm==0 & (casualworker==1 | informalworker==1)
gen informal_intensive2 = 1+informal_intensive

keep if !missing(informal_intensive)
drop if year ==199394 | year ==201112

gen n = 1
egen N = sum(n), by(year)

*residualize wages 
gen lnwage = ln(dwage)

global controls  "age age2"
global fe "sex religion sgroup married gen_edu Dcode"
qui reghdfe lnwage ${controls}, absorb(${fe}) res(what)
replace what = exp(what)

foreach var of varlist dwage what {
	qui summ `var', de 
	replace `var' = r(p1) if `var'<r(p1)
	replace `var' = r(p95) if `var'>r(p95)
}

tempfile data 
save `data', replace 

 
*************************************************
* DISTRIBUTION OF INFORMAL AND FORMAL WORKERS
*************************************************
preserve
collapse (mean) informal_intensive, by(prin_occ_2digit)
sort informal_intensive 
gen occ = _n 
 summ informal_intensive, de 
local p10 = r(p10)
local p25 = r(p25)
local p50 = r(p50)
local p75 = r(p75)
local p90 = r(p90)

cdfplot informal_intensive, opt1(lcolor(black) lw(medthick)) ///
		xline(`p25' `p75', lcolor(gs10) lp(dash))  ///
		xtitle("Fraction of Contract Workers") xlabel(0(0.1)1)
graph export "${output}/nss/FigureC1.png", replace 
restore 



*************************************************
* THEIL INDEX
*************************************************
preserve
global x  = "informal_intensive2"
egen mu = mean($x), by(year)
gen T = $x/mu * ln($x/mu)

*decomposing theil index 
global cat = "prin_occ_2digit"
global t = "treat year"
qui ta $cat
global No = `r(r)'
qui levelsof $cat
global rlevs "`r(levels)'"

egen mu_i = mean($x), by($cat $t)
egen N_i = sum(n), by($cat $t)

egen T_i = mean($x/mu_i * ln($x/mu_i)), by($cat $t)
gen s_i = N_i/N * (mu_i/mu)

keep mu_i N_i T_i s_i mu N $cat $t
duplicates drop 
sort $cat $t

*gcollapse (firstnm) mu_i N_i T_i s_i mu N, by($t $cat)

egen T = sum(s_i*T_i + s_i*ln(mu_i/mu)), by($t)
egen sT = sum(s_i*T_i), by($t)
gen frac_main = sT/T
duplicates drop $t frac_main, force 
mean frac_main if year!=199394
mean frac_main, over(year)
restore 


preserve
global x  = "dwage"
egen mu = mean($x), by(year)
gen T = $x/mu * ln($x/mu)

*decomposing theil index 
global cat = "prin_occ_2digit"
qui ta $cat

egen mu_i = mean($x), by($cat year)
egen N_i = sum(n), by($cat year)

egen T_i = mean($x/mu_i * ln($x/mu_i)), by($cat year)
gen s_i = N_i/N * (mu_i/mu)

gcollapse (firstnm) mu_i N_i T_i s_i mu N, by(year $cat)

egen T = sum(s_i*T_i + s_i*ln(mu_i/mu)), by(year)
egen across = sum(s_i*ln(mu_i/mu)), by(year)
egen sT = sum(s_i*T_i), by(year)
gen frac = sT/T
mean frac if year!=199394
mean frac , over(year)
restore 





preserve
global x  = "what"
egen mu = mean($x), by(year)
gen T = $x/mu * ln($x/mu)

*decomposing theil index 
global cat = "prin_occ_2digit"
qui ta $cat

egen mu_i = mean($x), by($cat year)
egen N_i = sum(n), by($cat year)

egen T_i = mean($x/mu_i * ln($x/mu_i)), by($cat year)
gen s_i = N_i/N * (mu_i/mu)

gcollapse (firstnm) mu_i N_i T_i s_i mu N, by(year $cat)

egen T = sum(s_i*T_i + s_i*ln(mu_i/mu)), by(year)
egen across = sum(s_i*ln(mu_i/mu)), by(year)
egen sT = sum(s_i*T_i), by(year)
gen frac = sT/T
mean frac
mean frac, over(year)
restore 







